Setup

Notes

  • Cell suspensions of meningeal dura enriched by MACS for cells expressing Cd45, Cd31, and Lyve-1
    • Cd45/Ptprc/B220/Ly-5/Lyt-4/T200: transmembrane protein tyrosine phosphatase, located on most haematopoietic cells, positive selection of leukocytes
    • Cd31/Pecam1: adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
    • Lyve1/1200012G08Rik/Lyve-1/Xlkd1/lymphatic vessel endothelial HA receptor-1: found primarily on lymphatic endothelial cells

Libraries

library(dplyr)          # ungroup()
library(ggtree)         # BuildClusterTree()
library(gridExtra)      # grid.arrange()
library(gtools)         # smartbind()
library(parallel)       # detectCores()
library(plotly)         # plot_ly()
library(Seurat)         # Idents()
library(SeuratWrappers) # RunPrestoAll()
library(ShinyCell)      # createConfig()
library(tidyr)          # %>%

Variables

out <- "../../results/pass_1/all_clusters/"
sample_order <- c("E3.M","E3.F","E4.M","E4.F")
sample_colors <- c("#26946A","#1814A1","#EAC941","#DF5F00")
sample_order2 <- c("Male E3", "Male E4", "Female E3", "Female E4")
isoform_order <- c("E4","E3")
isoform_colors <- c("darkgray","cornflowerblue")
sex_order <- c("Male","Female")
sex_colors <- c("darkgray","purple")

Read data

mouse.unannotated <- readRDS("../../rObjects/unannotated_obj.rds")
Idents(mouse.unannotated) <- "seurat_clusters"
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
## Normalizing layer: counts
mouse.unannotated
## An object of class Seurat 
## 42395 features across 16965 samples within 2 assays 
## Active assay: RNA (21356 features, 0 variable features)
##  2 layers present: data, counts
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Unannotated QC

UMAP

cluster_colors <- c("red4","red","orange","gold","lightgreen","chartreuse3","darkgreen",
                    "cyan","steelblue","blue","deeppink","pink","purple","tan","chocolate4",
                    "gray","black")
u1 <- DimPlot(object = mouse.unannotated,
              reduction = "umap",
              shuffle = TRUE,
              repel = TRUE,
              raster = FALSE,
              cols = cluster_colors,
              label = TRUE)
u1

u2 <- DimPlot(object = mouse.unannotated,
              reduction = "umap",
              shuffle = TRUE,
              repel = TRUE,
              dims = c(2,3),
              cols = cluster_colors,
              label = TRUE)
u2

Feature plots

# UMAP percent.mt
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.mt") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.ribo
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.ribo.protein") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.hb
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.hb") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nCount
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nCount_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nFeature
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nFeature_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP cell.complexity
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "cell.complexity") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

Violins

VlnPlot(mouse.unannotated,
        features = "nCount_RNA",
        split.by = "seurat_clusters")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "nFeature_RNA",
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "cell.complexity",
        split.by = "seurat_clusters")

Cells per cluster

# Cells per sample per cluster
sample_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
sample_ncells
##   sample    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14 15 16
## 1   E3.M  697 578 435 233 296 214 348 255 281 107  57 125  84  61  49 95 57
## 2   E3.F  347 642 503 323 171 206  30 183 284  75 113  36  63  65  20 56 38
## 3   E4.M  676 425 442 247 336 249 325 218 304 128 102  87  64  62  54 49 53
## 4   E4.F 1016 846 690 600 473 561 300 341 108 161 168 154 164 128 185 71 51
# Cells per isoform per cluster
isoform_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "isoform")) %>%
  dplyr::count(ident,isoform) %>%
  tidyr::spread(ident, n)
isoform_ncells
##   isoform    0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15
## 1      E4 1692 1271 1132 847 809 810 625 559 412 289 270 241 228 190 239 120
## 2      E3 1044 1220  938 556 467 420 378 438 565 182 170 161 147 126  69 151
##    16
## 1 104
## 2  95
# Cells per sex per cluster
sex_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sex")) %>%
  dplyr::count(ident,sex) %>%
  tidyr::spread(ident, n)
sex_ncells
##      sex    0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
## 1   Male 1373 1003  877 480 632 463 673 473 585 235 159 212 148 123 103 144 110
## 2 Female 1363 1488 1193 923 644 767 330 524 392 236 281 190 227 193 205 127  89

Gene histogram

# User params
goi <- "Malat1"
threshold <- 1

# Subset data
log2.threshold <- log2(threshold + 0.01)
counts.df <- FetchData(mouse.unannotated, vars = goi)
colnames(counts.df) <- "counts"
log2.counts.df <- log2(counts.df + 0.01)

# Histogram
title <- paste0(goi, "\nnCount_RNA > ", threshold)
hist1 <- ggplot(counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0(goi, " nCount_RNA")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# Histogram log transformed
hist2 <- ggplot(log2.counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0("Log2(",goi, " nCount_RNA)")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = log2.threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = log2.threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = log2.threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# plot
plots1 <- list(hist1,hist2)
layout1 <- rbind(c(1),c(2))
grid1 <- grid.arrange(grobs = plots1, layout_matrix = layout1)

Percent gene

# user define variable
goi <- "Malat1"

# Extract counts data
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- "seurat_clusters"
geneData <- FetchData(mouse.unannotated,
                      vars = goi,
                      slot = "counts")
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
geneData <- geneData > 1
table(geneData)
## geneData
## FALSE  TRUE 
##  1248 15717
mouse.unannotated$Expression <- geneData

FetchData(mouse.unannotated,
          vars = c("ident", "Expression")) %>%
  dplyr::count(ident, Expression) %>%
  tidyr::spread(ident, n)
##   Expression    0    1    2    3    4   5    6   7   8   9  10  11  12  13  14
## 1      FALSE  154    4    7    4   NA 753    1  12   8   2  NA  13 286  NA   2
## 2       TRUE 2582 2487 2063 1399 1276 477 1002 985 969 469 440 389  89 316 306
##    15  16
## 1   1   1
## 2 270 198
# Plot
mouse.unannotated@meta.data %>%
  group_by(seurat_clusters, Expression) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=Expression)) +
  geom_col() +
  ggtitle(paste0("Percentage of cells with > 1 counts for ", goi)) +
  theme(axis.text.x = element_text(angle = 90))

Cluster tree

  • Cluster trees are helpful in deciding what clusters to merge.
mouse.unannotated <- BuildClusterTree(object = mouse.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)
tree <- mouse.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Potential Markers

MACS markers

  • Ptprc - located on most haematopoietic cells, positive selection of leukocytes
  • Pecam1 - adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
  • Lyve1 - found primarily on lymphatic endothelial cells
VlnPlot(mouse.unannotated,
        features = "Ptprc",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Lyve1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

B-cells / Plasma cells

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Sdc1: Plasma cells
VlnPlot(mouse.unannotated,
        features = "Cd19",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Fcrla",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd79a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Sdc1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

T-cells

  • Trac/Cd3d/Cd3e/Cd3g are components of the TCR
VlnPlot(mouse.unannotated,
        features = "Trac",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd3e",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd8a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd4",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Ly6c1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Fibroblasts

VlnPlot(mouse.unannotated,
        features = "Col1a1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Mast cells

VlnPlot(mouse.unannotated,
        features = "Fcer1a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        cols = cluster_colors,
        split.by = "seurat_clusters")

Macrophage

VlnPlot(mouse.unannotated,
        features = "C1qa",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "C1qb",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Mki67",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Monocytes/DCs

  • Cd11c/Itgax: Monocytes & DCs
VlnPlot(mouse.unannotated,
        features = "Itgax", # Cd11c
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd209a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Neutrophils

VlnPlot(mouse.unannotated,
        features = "Ly6g",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Retnlg",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Pericytes & SMCs

VlnPlot(mouse.unannotated,
        features = "Acta2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Myl9",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Rgs5",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Schwann cells

VlnPlot(mouse.unannotated,
        features = "Cdh19",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Mpz",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Sandro’s markers

Idents(mouse.unannotated) <- "seurat_clusters"
goi <- c("Pdgfrb","Acta2","Vwf","Cldn5","Pecam1","Flt4","Prox1","Lyve1","Ptprc",
         "Cd19","Ms4a1","Ighd","Cd3e","Trbc2","Il7r","Nkg7","Klrb1b","Klrb1c",
         "Gata3","Rora","Itgax","H2-Eb1","Ccr2","Ly6c2","Lyz2","Ly6g","Itgam",
         "Mrc1","Csf1r","Cd38","Mki67","Mcpt4","Ms4a2","Col1a2","Plp1")

v1 <- VlnPlot(mouse.unannotated,
              features = goi,
              cols = cluster_colors,
              split.by = "seurat_clusters",
              flip = TRUE,
              stack = TRUE)
v1

Automatically detect markers

# auto find markers
Idents(mouse.unannotated) <- "seurat_clusters"
all.markers <- SeuratWrappers::RunPrestoAll(
  object = mouse.unannotated,
  assay = "RNA",
  slot = "counts",
  only.pos = TRUE
)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
# filter based on p_val_adj
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]

# subset table to top 2 and top 20 markers
top2 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 2))
top20 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 20))

# plot violin
v1 <- VlnPlot(mouse.unannotated,
        features = top2$gene,
        split.by = "seurat_clusters",
        flip = TRUE,
        stack = TRUE,
        cols = cluster_colors)
v1

# save table
write.table(all.markers, 
            paste0(out, "markers/unannotated_auto_find_markers_adjpval_0.01.tsv"),
            quote = FALSE,
            row.names = FALSE)
# compare 
table(all.markers$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 1996 3345 3440 3700 3086 1198 2745 1752 1321 2314  962 2203  559 3121  858  655 
##   16 
##  950
# subset based on cluster
cluster0 <- all.markers[all.markers$cluster == 0,]
cluster1 <- all.markers[all.markers$cluster == 1,]
cluster2 <- all.markers[all.markers$cluster == 2,]
cluster3 <- all.markers[all.markers$cluster == 3,]
cluster4 <- all.markers[all.markers$cluster == 4,]
cluster5 <- all.markers[all.markers$cluster == 5,]
cluster6 <- all.markers[all.markers$cluster == 6,]
cluster7 <- all.markers[all.markers$cluster == 7,]
cluster8 <- all.markers[all.markers$cluster == 8,]
cluster9 <- all.markers[all.markers$cluster == 9,]
cluster10 <- all.markers[all.markers$cluster == 10,]
cluster11 <- all.markers[all.markers$cluster == 11,]
cluster12 <- all.markers[all.markers$cluster == 12,]
cluster13 <- all.markers[all.markers$cluster == 13,]
cluster14 <- all.markers[all.markers$cluster == 14,]
cluster15 <- all.markers[all.markers$cluster == 15,]
cluster16 <- all.markers[all.markers$cluster == 16,]

Cluster Annotations

Cluster 0: Endothelial

VlnPlot(mouse.unannotated,
        features = cluster0$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 1: Macrophages

VlnPlot(mouse.unannotated,
        features = cluster1$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 2: Macrophages

VlnPlot(mouse.unannotated,
        features = cluster2$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 3: Fibroblasts

VlnPlot(mouse.unannotated,
        features = cluster3$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 4: T and NK cells

VlnPlot(mouse.unannotated,
        features = cluster4$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 5: Macrophages

VlnPlot(mouse.unannotated,
        features = cluster5$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 6: B cells

VlnPlot(mouse.unannotated,
        features = cluster6$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 7: Endothelial

VlnPlot(mouse.unannotated,
        features = cluster7$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 8: Neutrophils

VlnPlot(mouse.unannotated,
        features = cluster8$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 9: Myeloid precursors

VlnPlot(mouse.unannotated,
        features = cluster9$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 10: ILCs

VlnPlot(mouse.unannotated,
        features = cluster10$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 11: Pericytes and SMCs

VlnPlot(mouse.unannotated,
        features = cluster11$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 12: Fibroblasts

VlnPlot(mouse.unannotated,
        features = cluster12$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 13: ILCs

VlnPlot(mouse.unannotated,
        features = cluster13$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 14: Schwann cells

VlnPlot(mouse.unannotated,
        features = cluster14$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 15: Mast cells

VlnPlot(mouse.unannotated,
        features = cluster15$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 16: Plasma cells

VlnPlot(mouse.unannotated,
        features = cluster16$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Assign Identities

# Rename all identities
mouse.annotated <- RenameIdents(object = mouse.unannotated,
                                "0" = "Endothelial",         # Flt1,Ptprb
                                "1" = "Macrophages",         # Csf1r,Mrc1 
                                "2" = "Macrophages",         # Ccr2       
                                "3" = "Fibroblasts",         # Col1a1      
                                "4" = "T and NK cells",      # Skap1,Cd247     
                                "5" = "Macrophages",         # C1qa,C1qb,C1qc
                                "6" = "B cells",             # Pax5,Ms4a1
                                "7" = "Endothelial",         # Flt1,Vwf
                                "8" = "Neutrophils",         # S100a9,Hp
                                "9" = "Myeloid precursors",  # Mki67  
                                "10" = "ILCs",               # Il7r,Gata3 
                                "11" = "Pericytes and SMCs", # Myh11,Notch3 
                                "12" = "Fibroblasts",        # Col1a1,Col1a2
                                "13" = "ILCs",               # Il1rl1,Gata3
                                "14" = "Schwann cells",      # Mpz,Plp1
                                "15" = "Mast cells",         # Kit
                                "16" = "Plasma cells")       # Sdc1

# save idents
mouse.annotated$annotated_clusters <- Idents(mouse.annotated)

# set levels
mouse.annotated$annotated_clusters <- factor(mouse.annotated$annotated_clusters,
                                             levels = c("Pericytes and SMCs",
                                                        "Endothelial",
                                                        "B cells",
                                                        "Plasma cells",
                                                        "T and NK cells",
                                                        "ILCs",
                                                        "Neutrophils",
                                                        "Macrophages",
                                                        "Myeloid precursors",
                                                        "Mast cells",
                                                        "Fibroblasts",
                                                        "Schwann cells"))

# set ident
Idents(mouse.annotated) <- "annotated_clusters"

Annotated UMAP

# set params
DefaultAssay(mouse.annotated) <- "RNA"
mouse.annotated <- NormalizeData(mouse.annotated)
## Normalizing layer: counts
cluster_colors <- c("#B5B9BA", # Pericytes and SMCs
                    "#40BBFF", # Endothelial
                    "#a5d5a9", # B cells
                    "#5dbd64", # Plasma cells
                    "#1c7e24", # T and NK cells
                    "#F57C7C", # ILCs
                    "#FBB268", # Neutrophils
                    "#FE8D19", # Macrophages
                    "#DE9E83", # Myeloid precursors
                    "#A6CEE3", # Mast cells
                    "#9D7BBA", # Fibroblasts
                    "#977899") # Schwann cells

# umap
umap1 <- DimPlot(object = mouse.annotated, 
                reduction = "umap",
                repel = TRUE,
                group.by = "annotated_clusters",
                cols = cluster_colors)
umap1

umap2 <- DimPlot(object = mouse.annotated, 
                reduction = "umap",
                repel = TRUE,
                dims = c(2,3),
                group.by = "annotated_clusters",
                cols = cluster_colors)
umap2

Cluster tree

# build tree
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)
tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

# plot tree
ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Find markers

Automatically detect markers

# auto find markers
Idents(mouse.annotated) <- "annotated_clusters"
all.markers <- SeuratWrappers::RunPrestoAll(
  object = mouse.annotated,
  assay = "RNA",
  slot = "counts",
  only.pos = TRUE
)
## Calculating cluster Pericytes and SMCs
## Calculating cluster Endothelial
## Calculating cluster B cells
## Calculating cluster Plasma cells
## Calculating cluster T and NK cells
## Calculating cluster ILCs
## Calculating cluster Neutrophils
## Calculating cluster Macrophages
## Calculating cluster Myeloid precursors
## Calculating cluster Mast cells
## Calculating cluster Fibroblasts
## Calculating cluster Schwann cells
# filter based on p_val_adj
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]

# subset table to top 2 and top 20
top2 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 2))
top20 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 20))

# save
write.table(all.markers, 
            paste0(out, "markers/annotated_auto_find_markers_adjpval_0.01.tsv"),
            quote = FALSE,
            row.names = FALSE)

Violin markers

v1 <- VlnPlot(mouse.annotated,
        features = top2$gene,
        split.by = "annotated_clusters",
        flip = TRUE,
        stack = TRUE,
        cols = cluster_colors)
v1

goi <- c("Pdgfrb","Acta2","Vwf","Cldn5","Pecam1","Flt4","Prox1","Lyve1","Ptprc",
         "Cd19","Ms4a1","Ighd","Cd3e","Trbc2","Il7r","Nkg7","Klrb1b","Klrb1c",
         "Gata3","Rora","Itgax","H2-Eb1","Ccr2","Ly6c2","Lyz2","Ly6g","Itgam",
         "Mrc1","Csf1r","Cd38","Mki67","Mcpt4","Ms4a2","Col1a2","Plp1")
v2 <- VlnPlot(mouse.annotated,
              group.by = "annotated_clusters",
              split.by = "annotated_clusters",
              stack = TRUE,
              flip = TRUE,
              cols = cluster_colors,
              features = goi)
v2

Pseudobulk PCA

  • Each sample will have data points equal to the number of annotated_clusters
# Step 1: Pseudo-bulk the counts based on sample and cell type
pseudo <- AggregateExpression(
    object = mouse.annotated, 
    assays = "RNA", 
    features = rownames(mouse.annotated),
    return.seurat = TRUE, 
    group.by = c("sample", "annotated_clusters")
)
## Centering and scaling data matrix
# Step 2: Normalize the data
pseudo <- NormalizeData(pseudo,
                        normalization.method = "LogNormalize",
                        scale.factor = 10000)
## Normalizing layer: counts
# Step 3: Find variable features
pseudo <- FindVariableFeatures(pseudo,
                               selection.method = "vst", 
                               nfeatures = 2000)
## Finding variable features for layer counts
# Step 4: Scale the data
pseudo <- ScaleData(pseudo, 
                    features = rownames(pseudo))
## Centering and scaling data matrix
# Step 5: Run PCA
pseudo <- RunPCA(pseudo, 
                 features = VariableFeatures(pseudo),
                 npcs = 10)
## PC_ 1 
## Positive:  Cyth4, Epsti1, Myo5a, Dclre1c, Cd48, Tnfaip8l2, Il10ra, Evi2a, Ctss, Sh3bp2 
##     Naip6, Slc9a9, Gna15, Sh3bp1, Slc43a2, Sirpa, Efhd2, Clcn5, Tagap, H2-Eb1 
##     H2-Ab1, Ms4a6c, Gm2245, Cd244a, H2-Aa, Ctsc, Nfatc2, Tmem8, Foxred2, Ly86 
## Negative:  Wwtr1, Fbxl7, Myo10, Me3, Adarb1, Plxna2, Nbea, Cxcl12, Efnb1, Myo6 
##     Vangl1, Cttnbp2, Nfib, Tspan18, Arhgap29, Pdgfd, Tshz2, Osmr, Zfp423, Amotl1 
##     Nhsl1, Zfpm2, Rhoj, Pik3r3, Id1, Tns2, Apbb2, Ghr, Msrb3, Rai14 
## PC_ 2 
## Positive:  Icam2, Pecam1, Zfp366, Rasip1, Slfn3, Spns2, Cd300lg, Cmtm8, B3gnt3, Slc2a1 
##     Rapgef5, Pkn3, Ccm2l, Wipf3, Flt4, Kank3, Nova2, Ripply3, Exoc3l2, Chrm2 
##     Tie1, Fam167b, Enpp6, Deup1, Notch4, Dll4, Arhgef15, Ushbp1, Mcf2l, Robo4 
## Negative:  Slc38a2, Olfml3, Serpinf1, Gpx3, Clmp, Shisa3, H19, Slc35g1, Frmpd4, Col12a1 
##     Car13, Mfap4, Prrx2, Clec11a, Cacna2d2, Sfrp4, Ecrg4, Col8a2, Fmod, Foxd1 
##     Lmx1b, Fbln1, Mdk, Lum, Cpxm1, Glrb, Gm29478, Gdf10, Pdgfra, Kcnb2 
## PC_ 3 
## Positive:  Il9r, Ly6d, Iglc3, Pou2af1, Mzb1, Gm43388, Gm34215, 2010309G21Rik, Fcrla, Iglc2 
##     Cd79a, Gm42836, Chst3, Il5ra, Tmem163, Spib, Srpk3, Ms4a1, Tnfrsf13c, Klhl14 
##     Cd79b, Pax5, Siglecg, Cd19, Iglc1, Fcmr, B3gnt5, Gm4610, Fam129c, Fcrl1 
## Negative:  Gm4951, Ophn1, Rab11fip5, Trpv4, Slco2b1, Dab2, Cmklr1, Hfe, Stab1, Hsf3 
##     Plxnb2, Rab3il1, Cxcl16, Wwp1, Cables1, Cln8, Ang, Mcub, Zmynd15, Prune2 
##     Tmem106a, Cd36, Hpgd, Abca9, Serpinb8, Itsn1, Tmem37, Plxna4os1, Tslp, Sdc3 
## PC_ 4 
## Positive:  Ppp1r42, Mirt2, Cebpe, Dhrs9, 9830107B12Rik, Lcn2, Acod1, Trem3, Itgb2l, Trem1 
##     Cstdc4, F5, Olfm4, 4930438A08Rik, Fpr1, Il1f9, Fpr2, Ankrd22, A530064D06Rik, Il1rn 
##     Mapk13, Arg2, Wfdc21, Stfa1, E230014E18Rik, Gm33326, Cxcr2, Slfn4, Nlrp12, Ltf 
## Negative:  Tox, S100a4, Fam49a, Tep1, Unc5cl, Dscam, Myo3b, Gimap7, Btbd11, Gpr174 
##     Trim36, Rnf43, Cd48, Lck, Sidt1, Ctla4, Clnk, Creb5, Tcf7, A430093F15Rik 
##     Il2rb, Themis, Ccl5, Cd28, Cd247, P2rx7, Cd3g, Ms4a4b, Trbc2, Cd3e 
## PC_ 5 
## Positive:  Ctnna3, Kcnj12, Itga7, Slc35f1, Sorcs1, Ppp1r9a, Il34, Ank3, Kcna2, Mcam 
##     C4b, S100a4, Synpo2, Afap1l2, Mpz, Mt3, Lrrc4c, Adamts20, Sema3b, Dlgap1 
##     Tagln, Akap6, Chl1, Nkain2, Kcna1, Ptprz1, Sox10, Abca8a, Cadm2, Plp1 
## Negative:  Tnfsf10, Myzap, Rph3al, Tmem163, Pecam1, Nxpe2, Icam2, Ly6d, Dync1i1, Pik3c2b 
##     Btla, Pou2af1, Iglc3, Fcrla, Blk, F8, Spib, Mzb1, Cd79a, 2010309G21Rik 
##     Gm16070, Cd79b, Ifitm10, Gm43388, Gm34215, Slfn3, Smim9, Iglc2, Siglecg, Trpm6
# Step 6: Visualize PCA
pca_colors <- c("firebrick1","cyan","gold","red3","blue","black","forestgreen",
                    "darkorchid1","green","gray","deeppink","chocolate4","tan",
                    "steelblue","pink","orange")
pca <- DimPlot(pseudo, 
        reduction = "pca",
        group.by = "annotated_clusters",
        cols = pca_colors,
        pt.size = 3)
pca

pdf(paste0(out, "clustering_QC/pseudobulk_pca.pdf"), width = 6, height = 4)
pca
dev.off()
## png 
##   2

Shiny App

# create new object
shiny.obj <- mouse.annotated
VariableFeatures(shiny.obj) <- shiny.obj@assays$SCT@var.features

# set default params
DefaultAssay(shiny.obj) <- "RNA"
Idents(shiny.obj) <- "annotated_clusters"

# create config
names <- colnames(shiny.obj@meta.data)
names <- names[c(22,24,2:21)]
sc.config <- createConfig(obj = shiny.obj,
                          meta.to.include = names)

# change wd
setwd(out)

# output shiny app folder
makeShinyApp(obj = shiny.obj, 
             scConf = sc.config, 
             gene.mapping = TRUE,
             shiny.title = "All Clusters")

# manual config edits
sc1conf <- readRDS("shinyApp/sc1conf.rds")
sc1conf[2,4] <- "#B5B9BA|#40BBFF|#a5d5a9|#5dbd64|#1c7e24|#F57C7C|#FBB268|#FE8D19|#DE9E83|#A6CEE3|#9D7BBA|#977899"
saveRDS(sc1conf, "shinyApp/sc1conf.rds")